package com.daervin.svc.parser.sub;

import com.daervin.svc.common.constants.Constants;
import com.daervin.svc.common.dto.NewsDTO;
import org.springframework.util.StringUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.selector.Html;

import java.util.List;

import static com.daervin.svc.common.constants.SourceEnum.TECH_NODE;

/**
 * @author daervin
 * @version 1.0.0
 * @since 1.0.0
 */
public class TechNodeSubPageParser extends RootSubParser {
    public TechNodeSubPageParser(String... listUrl) {
        super(listUrl);
    }

    @Override
    public MatchOther processPage(Page page) {
        Html html = page.getHtml();
        String title = html.xpath("//*[@class=\"header-title\"]//span//text()").get();
        String dateTime = html.xpath("//div[@class=\"post-info\"]/div[@class=\"date-info\"]/text()").get();
        if (StringUtils.isEmpty(title) || title.contains("早8点") || title.contains("早 8 点档") || StringUtils.isEmpty(dateTime)) {
            return MatchOther.NO;
        }
        dateTime = dateTime.replaceAll("/", "-");
        StringBuilder desc = new StringBuilder();
        List<String> descPList = html.xpath("//div[@class='post-content']/p").all();
        if (StringUtils.isEmpty(descPList)) {
            return MatchOther.NO;
        }
        for (String pitem : descPList) {
            pitem = pitem.trim().replaceAll("(<p>|</p>|</a>)", "").replaceAll("<a\\b[^>]+\\bhref=\"([^\"]*)\"[^>]*>", "");
            if (pitem.contains("图片：") || pitem.contains("图片来自") || pitem.contains("&nbsp;")) {
                continue;
            }
            desc.append(pitem);
        }
        if (StringUtils.isEmpty(desc) || desc.toString().contains("<img")) {
            return MatchOther.NO;
        }
        NewsDTO news = new NewsDTO();
        news.setTitle(title);
        news.setDesc(desc.toString());
        news.setBelongDate(dateTime);
        news.setAnnouncer(TECH_NODE.announcer);
        news.setLinks(page.getRequest().getUrl());
        news.setCategory(TECH_NODE.category);

        page.putField(Constants.PARSER_RESULT_ITEM, news);
        return MatchOther.YES;
    }
}
